#!/usr/bin/python
"""
Converts BLASTP output file (m8) to OrthoMCL compliant format

Usage: dissertation_RenameOrthomclCompliant.py blastfile.blastp accessions.list
Note: accession.list file should contain list of bioproject ids (arbitrary)
      Each *.locus_tag file should contain all locus_tags for the organism

"""

import sys
from Bio import SeqIO

blastfile = open(sys.argv[1], "rU")
blf = blastfile.readlines()

acclist = open(sys.argv[2], "rU")
acf = acclist.readlines()

conversiondict = {}

for i in acf:
    x = i.strip()
    ft = open(x+".locus_tags", "rU")
    ftl = ft.readlines()
    for j in ftl:
        y = j.strip()
        conversiondict[y] = x + "|" + y
    ft.close()

for line in blf:
    l = line.split('\t')
    c1 = l[0] #query locus_tag
    c2 = l[1] #hit locus_tag
    c3 = l[2]
    c4 = l[3]
    c5 = l[4]
    c6 = l[5]
    c7 = l[6]
    c8 = l[7]
    c9 = l[8]
    c10 = l[9]
    c11 = l[10]
    c12 = l[11].rstrip()
    c1new = ""
    c2new = ""
    if c1 in conversiondict:
        c1new = conversiondict[c1]
    if c2 in conversiondict:
        c2new = conversiondict[c2]
    print c1new + "\t" + c2new + "\t" + c3 + "\t" + c4 + "\t" + c5 + "\t" + c6 + "\t" + \
        c7 + "\t" + c8 + "\t" + c9 + "\t" + c10 + "\t" + c11 + "\t" + c12
    
blastfile.close()
acclist.close()

